/*==================================================
project:       Cleans up SISBEN for use
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    15 Aug 2022 - 10:58:49
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

 /*==================================================
               0: Program set up
 ==================================================*/
 *Written on STATA 17
 drop _all
 set varabbrev off	// no variable abbreviations allowed (personal preference)
 
 /*==================================================
               1: loading data
 ==================================================*/
 
 *============ 1.0.1: Prepare additional data 
  use "${dir2}/survey_data/COL_SISBEN_Hogar_CP_IDB.dta", clear
  
  keep id_hogar_SISBEN pondera
  
  rename id_hogar_SISBEN id_hogar
  gen survey_sample = 1
  
  tempfile surv_id
  save `surv_id', replace 
 
 *============ 1.1: Load data from crypted
 use "${dir8}/SISBEN/SISBEN_IV_20211012_sampling_frame.dta", clear
  
 *----------1.2 labelling and renaming (raw data)
 
 lab var id_hogar				"ID household"
 lab var id_persona				"Individual ID"
 lab var id_vivienda			"Dwelling ID"
 
 
 lab var ind_conyuge_vive_hogar 		"Lives w/ partner" 	
 lab var ind_padre_vive_hogar 			"Lives with parent" 	
 lab var tip_uso_sanitario  			"Kind of WC" 		
 lab var cod_clase 						"Rural/urban"	
 lab var nivel 							"Nivel SISBEN"					
 lab var cod_dpto_sf 					"Department code"			
 lab var sexo_persona_sf 				"Sex"
 lab var fec_nacimiento 				"DOB"
 
 lab var uso_vivienda				""
 lab var tip_vivienda				"Type of living place"
 lab var tip_mat_paredes			"Wall material"
 lab var tip_mat_pisos				"Floor material"
 lab var ind_tiene_energia			"Electricity"
 lab var ind_tiene_alcantarillado	"Sewerage"
 lab var ind_tiene_gas				"Natural gas"
 lab var ind_tiene_recoleccion		"Garbage collection"
 lab var ind_tiene_acueducto		"Running water"
 lab var num_cuartos_vivienda		"Number of rooms"
 lab var num_hogares_vivienda		"Number of households"
 *lab var tip_ocupa_vivienda			"The living place is (Ownership/renting)"
 lab var num_cuartos_exclusivos		"Number of rooms"
 lab var num_cuartos_dormir			"Number of bedrooms"
 lab var num_cuartos_unicos_dormir	"Number of bedroom used only for sleeping"
 lab var tip_sanitario				"WC type"
 lab var tip_ubi_sanitario			"WC location"
 lab var tip_origen_agua			"Water source"
 lab var ind_agua_llega_7dias		"Water 7 days a week"
 lab var num_dias_llega				"Number of days with water"
 lab var ind_agua_llega_24horas		"Water 24h"
 lab var num_horas_llega			"number of hours w/ water"
 lab var tip_uso_agua_beber			"Source drinking watter"
 lab var tip_elimina_basura			"Way of trash disposal"
 lab var ind_tiene_cocina			"Has kitchen"
 lab var tip_prepara_alimentos		"Place used for cooking"
 lab var tip_uso_cocina				"Exclusivity of the kitchen or cooking place"
 lab var tip_energia_cocina			"Source of energy for cooking"
 lab var ind_tiene_nevera			"Has a fridge"
 lab var ind_tiene_lavadora			"Has washing machine"
 lab var ind_tiene_pc				"Has PC"
 lab var ind_tiene_internet			"Internet connection"
 lab var ind_tiene_moto				"Has motorbike"
 lab var ind_tiene_tractor			"Has tractor"
 lab var ind_tiene_carro			"Has a car"
 lab var ind_tiene_bien_raiz		"Has real state"
 lab var ind_gasto_alimento			"Expends on food"
 lab var vlr_gasto_alimento			"Food expenditure value"
 lab var ind_gasto_transporte		"Expends on transport"
 lab var vlr_gasto_transporte		"Transport expenditure (Monthly)"
 lab var ind_gasto_educacion		"Expends on education"
 lab var vlr_gasto_educacion		"Education expenditure (Monthly)"
 lab var ind_gasto_salud			"Expends on health"
 lab var vlr_gasto_salud			"Health expenditure (Monthly)"
 lab var ind_gasto_serv_publicos	"Expends in public services"
 lab var vlr_gasto_serv_publicos	"Public services expenditure (Monthly)"
 lab var ind_gasto_celular			"Expends on cellphone"
 lab var vlr_gasto_celular			"Cellphone expenditure (Monthly)"
 lab var ind_gasto_arriendo			"Expends on rent"
 lab var vlr_gasto_arriendo			"Rent expenditure (Monthly)"
 lab var ind_gasto_otros			"Has other expenses"
 lab var vlr_gasto_otros			"Value other expenses (Monthly)"
 lab var vlr_total_gastos			"Total expenditure"
 lab var num_habita_vivienda		"Time since first move in"

 lab var num_personas_posibles		"Number of people in HH"
 lab var num_personas_hogar			"Number of people in the household"
 lab var tip_estado_civil			"Civil status"
 
 lab var ind_tuvo_hijos				"Has childern"
 lab var tip_cuidado_niños			"Children care"
 lab var ind_recibe_comida			"Eats (lunch/dinner) in the place where spends most of the time"
 lab var ind_leer_escribir			"Writes/reads"
 lab var ind_estudia				"Studies"
 lab var niv_educativo				"Education level"
 lab var grado_alcanzado			"Higher degree obtained"
 lab var vlr_ingr_col_mayor			"Value Colombia Mayor subsidy"
 lab var vlr_ingr_otro_subsidio		"Income other subsidies"
 lab var ind_fondo_pensiones		"Pension fund"
 lab var tip_actividad_mes			"Type of activity month"
 lab var num_sem_buscando			"Number of week on the look"
 lab var tip_empleado				"Type of employee"
 lab var ind_ingr_salario			"Receives a wage"
 lab var vlr_ingr_salario			"Wage value"
 lab var ind_ingr_honorarios		"Receives honoraries"
 lab var vlr_ingr_honorarios		"Amount precived in honoraries"
 lab var ind_ingr_cosecha			"Receives income for harvest"
 lab var num_mes_ingr_cosecha		"Number of months with harvest income"
 lab var vlr_ingr_cosecha			"Income percived due to harvest"
 lab var ind_ingr_pension			"Recieves pension"
 lab var vlr_ingr_pension			"Income precived for pension"
 lab var ind_ingr_remesa_pais		"Receives national transfers"
 lab var vlr_ingr_remesa_pais		"Income percived due to national transfers"
 lab var ind_ingr_remesa_exterior	"Receives international transfers"
 lab var vlr_ingr_remesa_exterior	"Income percived due to international transfers"
 lab var ind_ingr_arriendos			"Receives incomes due to property leasing"
 lab var vlr_ingr_arriendos			"Income percived due to property leasing"
 lab var ind_ingr_estado			"Recieves income from the State"
 lab var vlr_ingr_fam_accion		"Income received familias en acción"
 lab var tip_seg_social				"Type of social security"
 lab var tip_estrato_energia		"Engergy bill estrato"
 lab var tip_estrato_acueducto		"Water bill estrato"
 lab var tip_sanitario				"WC type"

 
 /*==================================================
               2: Cleaninng
 ==================================================*/
 
 
 *----------2.1 Missing definition
 
 foreach var of varlist _all {
 	cap replace `var' =  "." if `var' == "NULL"
 	cap replace `var' =  "." if `var' == ""
 }
 
  *----------2.2: clean numeric variables
 
 loc numeric "tip_vivienda tip_mat_paredes tip_mat_pisos ind_tiene_energia ind_tiene_alcantarillado ind_tiene_gas ind_tiene_recoleccion ind_tiene_acueducto num_cuartos_vivienda num_hogares_vivienda  num_cuartos_exclusivos num_cuartos_dormir num_cuartos_unicos_dormir tip_sanitario tip_ubi_sanitario tip_origen_agua ind_agua_llega_7dias num_dias_llega ind_agua_llega_24horas num_horas_llega tip_uso_agua_beber tip_elimina_basura ind_tiene_cocina tip_prepara_alimentos tip_uso_cocina tip_energia_cocina ind_tiene_nevera ind_tiene_lavadora ind_tiene_pc ind_tiene_internet ind_tiene_moto ind_tiene_tractor ind_tiene_carro ind_tiene_bien_raiz ind_gasto_alimento vlr_gasto_alimento ind_gasto_transporte vlr_gasto_transporte ind_gasto_educacion vlr_gasto_educacion ind_gasto_salud vlr_gasto_salud ind_gasto_serv_publicos vlr_gasto_serv_publicos ind_gasto_celular vlr_gasto_celular ind_gasto_arriendo vlr_gasto_arriendo ind_gasto_otros vlr_gasto_otros vlr_total_gastos num_habita_vivienda num_personas_posibles num_personas_hogar edad_calculada tip_estado_civil ind_tuvo_hijos tip_cuidado_niños ind_recibe_comida ind_leer_escribir ind_estudia niv_educativo grado_alcanzado vlr_ingr_col_mayor vlr_ingr_otro_subsidio ind_fondo_pensiones tip_actividad_mes num_sem_buscando tip_empleado ind_ingr_salario vlr_ingr_salario ind_ingr_honorarios vlr_ingr_honorarios ind_ingr_cosecha num_mes_ingr_cosecha vlr_ingr_cosecha ind_ingr_pension vlr_ingr_pension ind_ingr_remesa_pais vlr_ingr_remesa_pais ind_ingr_remesa_exterior vlr_ingr_remesa_exterior ind_ingr_arriendos vlr_ingr_arriendos ind_otros_ingresos vlr_otros_ingresos ind_ingr_estado vlr_ingr_fam_accion ind_conyuge_vive_hogar ind_padre_vive_hogar tip_seg_social tip_estrato_energia tip_estrato_acueducto tip_uso_sanitario sexo_persona_sf ind_gasto_celular cod_clase "
 
 
 
 foreach v of local numeric {
 	cap destring `v', replace // format(%12.3g) 
 	if (_rc){
 		noi di as err "`v' has not numeric characters CHECK"
 	}
 	else {
 		qui sum `v'
 		if (r(max) == 99 )			replace `v' = . if `v' == 99
 		else if (r(max) == 999)		replace `v' = . if  `v' == 999
 	}
 }
 
 
  *----------2.3: Clean dates
 
 loc dates "fec_nacimiento"
 
 foreach v of local dates {
 	replace `v' = "" if `v' == "."
 	tempvar _date 
 	gen `_date' = substr(`v',1,10)
 	gen `v'_df = date(`_date', "YMD###")
 	drop `_date'
 	format `v'_df %td
 	order `v'_df, after(`v')
 }
 
 *----------2.1: New variables 
 
  // age
 gen age_nov19 = floor((mofd(date("2019-11-01", "YMD")) - mofd(fec_nacimiento_df))/12)
 gen age_feb20 = floor((mofd(date("2020-02-01", "YMD")) - mofd(fec_nacimiento_df))/12)
 gen age_jun20 = floor((mofd(date("2020-06-01", "YMD")) - mofd(fec_nacimiento_df))/12)
 gen age_sep20 = floor((mofd(date("2020-09-01", "YMD")) - mofd(fec_nacimiento_df))/12)
 
 // number of childern 
 cap drop _aux
 gen _aux = age_nov19 < 18 
 bys id_hogar: egen n_kids_18 = total(_aux)
 drop _aux
 
 cap drop _aux
 gen _aux = age_nov19 < 15
 bys id_hogar: egen n_kids_15 = total(_aux)
 drop _aux
 
 cap drop _aux
 gen _aux = age_nov19 >= 65
 bys id_hogar: egen n_elder_65 = total(_aux)
 drop _aux
 
 // urban rural
 gen urban = (cod_clase == 1)
 lab define lurb 1 "Cabecera" 2 "Centro poblado" 3 "Rural disperso"
 lab values cod_clase lurb
 
 // PMT variables 
 gen male = (sexo_persona_sf ==1)
 gen female = (sexo_persona_sf ==2)
 lab define lsex 1 "male" 0 "female"
 lab values male lsex
 
 // WC type
 gen wc_flush = (tip_sanitario == 1)
 gen wc_pit = (tip_sanitario == 2)
 gen wc_unconnected = (tip_sanitario == 3)
 gen wc_letrine = (tip_sanitario == 4)
 gen wc_no = (tip_sanitario == 5)
 
 // Floor type
 gen floor_finished = (tip_mat_pisos == 1 | tip_mat_pisos == 2)
 gen floor_polished = (tip_mat_pisos == 1)
 gen floor_brick = (tip_mat_pisos == 2)
 gen floor_concrete = (tip_mat_pisos == 3)
 gen floor_badwood = (tip_mat_pisos == 4)
 gen floor_dirt = (tip_mat_pisos == 5)
 gen floor_other = (tip_mat_pisos == 6)
 
 // Wall type
 gen wall_finished = (tip_mat_paredes == 1)
 gen wall_adobe = (tip_mat_paredes == 2)
 gen wall_bahareque = (tip_mat_paredes == 3)
 gen wall_prefabricated = (tip_mat_paredes == 4)
 gen wall_badwood = (tip_mat_paredes == 5)
 gen wall_vegetal = (tip_mat_paredes == 6)
 gen wall_zinc = (tip_mat_paredes == 7)
 gen wall_no = (tip_mat_paredes == 0)
 
 // Members per room 
 gen members_room = num_personas_hogar / num_cuartos_vivienda
 gen members_bedroom = num_personas_hogar / num_cuartos_dormir
 gen members_bedroom_exc = num_personas_hogar / num_cuartos_unicos_dormir
 
 // has kitchen
 gen kitchen = (ind_tiene_cocina == 1)
 replace kitchen = . if ind_tiene_cocina == 9
 
 // power source cooking
 gen cookpower_electric = (tip_energia_cocina == 1)
 gen cookpower_natgas = (tip_energia_cocina == 2)
 gen cookpower_popgas = (tip_energia_cocina == 3)
 gen cookpower_petrol = (tip_energia_cocina == 4)
 gen cookpower_carbon = (tip_energia_cocina == 5)
 gen cookpower_firewood = (tip_energia_cocina == 6)
 gen cookpower_no = (tip_energia_cocina == 7)
 
 foreach v in cookpower_electric cookpower_natgas cookpower_popgas cookpower_petrol cookpower_carbon cookpower_firewood {
 	*replace `v' = . if (tip_energia_cocina == 9)
 	gen `v'_missing = (tip_energia_cocina == 9)
 }
  
 // Electricity
 gen electricity = (ind_tiene_energia == 1)
 *replace electricity = . if ind_tiene_energia == 9
 gen electricity_missing = (ind_tiene_energia == 9)
 
 // Assets 
 gen own_fridge = (ind_tiene_nevera == 1)
 replace own_fridge = . if ind_tiene_nevera == 9
 
 gen own_washing_machine = (ind_tiene_lavadora == 1)
 replace own_washing_machine = . if ind_tiene_lavadora == 9
 
 gen own_PC = (ind_tiene_pc == 1)
 replace own_PC = . if ind_tiene_pc == 9
 
 gen own_internet = (ind_tiene_internet == 1)
 replace own_internet = . if ind_tiene_internet == 9
 
 gen own_moto = (ind_tiene_moto == 1)
 replace own_moto = . if ind_tiene_moto == 9
 
 gen own_tractor = (ind_tiene_tractor == 1)
 replace own_tractor = . if ind_tiene_tractor == 9
 
 gen own_car = (ind_tiene_carro == 1)
 replace own_car = . if ind_tiene_carro == 9
 
 gen own_real_state = (ind_tiene_bien_raiz == 1)
 replace own_real_state = . if ind_tiene_bien_raiz == 9
 
 *gen own_house = (tip_ocupa_vivienda == 2 | tip_ocupa_vivienda == 3)
 
 gen married = (tip_estado_civil == 2)
 gen free_union = (tip_estado_civil == 1)
 gen in_couple = (tip_estado_civil == 2 | tip_estado_civil == 1)
 gen divorced = (tip_estado_civil == 4)
 gen single = (tip_estado_civil == 5)
 gen parents = (ind_padre_vive_hogar == 1)
 
 // Other interesting covariates 
 gen sewerage = (ind_tiene_alcantarillado == 1)
 replace sewerage = . if ind_tiene_alcantarillado == 9
 
 gen natural_gas = (ind_tiene_gas == 1)
 replace natural_gas = . if ind_tiene_gas == 9
 
 gen trash_collection = (ind_tiene_recoleccion == 1)
 replace trash_collection = . if ind_tiene_recoleccion == 9
 
 gen running_water = (ind_tiene_acueducto == 1)
 replace running_water = . if ind_tiene_acueducto == 9
 
  ** House ownership
 /*
 gen house_rent = (tip_ocupa_vivienda == 1)
 gen house_morgage = (tip_ocupa_vivienda == 2)
 gen house_own = (tip_ocupa_vivienda == 3)
 gen house_permission = (tip_ocupa_vivienda == 4)
 gen house_defacto = (tip_ocupa_vivienda == 5) */
 
  ** Water access
 gen water_7days = (ind_agua_llega_7dias == 1)
 *replace water_7days = . if ind_agua_llega_7dias == 9
 gen water_7days_missing = (ind_agua_llega_7dias == 9)
 rename num_dias_llega water_ndays
 gen water_24h = (ind_agua_llega_24horas == 1)
 *replace water_24h = . if ind_agua_llega_24horas == 9
 gen water_24h_missing = (ind_agua_llega_24horas == 9)
 rename num_horas_llega water_nhours
  ** Drinking Water type
 gen water_drink_tap = (tip_uso_agua_beber == 1)
 gen water_drink_boil = (tip_uso_agua_beber == 2)
 gen water_drink_chlorine = (tip_uso_agua_beber == 3)
 gen water_drink_filter = (tip_uso_agua_beber == 4)
 gen water_drink_decanted = (tip_uso_agua_beber == 5)
 gen water_drink_bottle = (tip_uso_agua_beber == 6)
  ** Trash disposal type
 gen trash_service = (tip_elimina_basura == 1)
 gen trash_buried = (tip_elimina_basura == 2)
 gen trash_burn = (tip_elimina_basura == 3)
 gen trash_throw = (tip_elimina_basura == 4)
 gen trash_water = (tip_elimina_basura == 5)
 gen trash_informal = (tip_elimina_basura == 6)
 gen trash_other = (tip_elimina_basura == 7)
 
  ** Shared kitchen
 gen shared_kitchen = (tip_uso_cocina == 2 | tip_uso_cocina == 3)
 *replace shared_kitchen = . if tip_uso_cocina == 9 
 gen shared_kitchen_missing = (tip_uso_cocina == 9)
 
  ** Expenditure
 gen d_exp_food = (ind_gasto_alimento == 1)
 *replace d_exp_food = . if ind_gasto_alimento == 9
 gen d_exp_food_missing = (ind_gasto_alimento == 9)
 
 gen d_exp_transport = (ind_gasto_transporte == 1)
 gen d_exp_transport_missing  = (ind_gasto_transporte == 9)
 *replace d_exp_transport = . if ind_gasto_transporte == 9
 
 gen d_exp_education = (ind_gasto_educacion == 1)
 gen d_exp_education_missing  = (ind_gasto_educacion == 9)
 *replace d_exp_education = . if ind_gasto_educacion == 9
 
 gen d_exp_health = (ind_gasto_salud == 1)
 gen d_exp_health_missing  = (ind_gasto_salud == 9)
 *replace d_exp_health = . if ind_gasto_salud == 9
 
 gen d_exp_public_services = (ind_gasto_serv_publicos == 1)
 gen d_exp_public_services_mis = (ind_gasto_serv_publicos == 9)
 *replace d_exp_public_services = . if ind_gasto_serv_publicos == 9
 
 gen d_exp_cellphone = (ind_gasto_celular == 1)
 gen d_exp_cellphone_missing  = (ind_gasto_celular == 9)
 *replace d_exp_cellphone = . if ind_gasto_celular == 9
 
 gen d_exp_rent = (ind_gasto_arriendo == 1)
 gen d_exp_rent_missing  = (ind_gasto_arriendo == 9)
 *replace d_exp_rent = . if ind_gasto_arriendo == 9
 
 gen d_exp_other = (ind_gasto_otros == 1)
 gen d_exp_other_missing  = (ind_gasto_otros == 9)
 *replace d_exp_other = . if ind_gasto_otros == 9
 
 rename vlr_gasto_alimento			exp_food				
 rename vlr_gasto_transporte		exp_transport		
 rename vlr_gasto_educacion			exp_education		
 rename vlr_gasto_salud				exp_health			
 rename vlr_gasto_serv_publicos		exp_public_services	
 rename vlr_gasto_celular			exp_cellphone		
 rename vlr_gasto_arriendo			exp_rent				
 rename vlr_gasto_otros				exp_other			
 rename vlr_total_gastos			total_expenditure
 
   ** education
 gen grade_attained = grado_alcanzado
 replace grade_attained = . if grado_alcanzado == 99
 
 gen pre_school = (niv_educativo == 1)
 gen primary = (niv_educativo == 2)
 gen middleschool = (niv_educativo == 3)
 gen highschool = (niv_educativo == 4)
 gen tecnico = (niv_educativo == 5)
 gen university = (niv_educativo == 6)
 gen postgrade = (niv_educativo == 7)
 gen no_education = (niv_educativo == 0)
 
 // gen variables (harmonic w/ other datasets) 
 gen no_edu = no_education == 1 | pre_school == 1 
 gen elementary = primary == 1 | middleschool == 1
 gen tertiary_edu = tecnico == 1 | university == 1 | postgrade == 1
 
 
 foreach v in pre_school primary middleschool highschool tecnico university postgrade no_education {
 	replace  `v' = . if niv_educativo == 9
 }
 
 ** Health related 
 gen children = (ind_tuvo_hijos == 1)
 
 ** main activity
 gen act_working = (tip_actividad_mes == 1 )
 gen act_jobseeking = (tip_actividad_mes == 2)
 gen act_study = (tip_actividad_mes == 3 )
 gen act_stayhome = (tip_actividad_mes == 4 )
 gen act_rentier = (tip_actividad_mes == 5 )
 gen act_pensioner = (tip_actividad_mes == 6 )
 gen act_disabled = (tip_actividad_mes == 7 )
 gen act_noactivity = (tip_actividad_mes == 0)
 
 foreach v in act_working act_jobseeking act_study act_stayhome act_rentier act_pensioner act_disabled act_noactivity {
 	replace `v' = . if tip_actividad_mes == 99
 }
 
 ** job seeking time 
 gen weeks_jobseeking = num_sem_buscando
 replace weeks_jobseeking = . if num_sem_buscando == 99
 
 ** Employment status (PILA and camara de comercio)
 gen ocp_employee_priv = (tip_empleado == 1)
 gen ocp_employee_gov = (tip_empleado == 2)
 gen ocp_employee_dom = (tip_empleado == 3)
 gen ocp_independent_prof = (tip_empleado == 4)
 gen ocp_independent_worker = (tip_empleado == 5)
 gen ocp_bussinesowner = (tip_empleado == 6)
 gen ocp_parcel = (tip_empleado == 7)
 gen ocp_unpaid = (tip_empleado == 8 | tip_empleado == 9 )
 gen ocp_jornalero = (tip_empleado == 10 )
 
 foreach v in ocp_employee_priv ocp_employee_gov ocp_employee_dom ocp_independent_prof ocp_independent_worker ocp_bussinesowner ocp_parcel ocp_unpaid ocp_jornalero {
 	replace `v' = . if tip_empleado == 99
 }
 
 ** social security and pensions
 gen SS_contributive = (tip_seg_social == 1)
 replace SS_contributive = . if tip_seg_social == 9
 gen SS_special = (tip_seg_social == 2)
 *replace SS_special = . if tip_seg_social == 9
 gen SS_subsidised = (tip_seg_social == 3)
 *replace SS_subsidised = . if tip_seg_social == 9
 gen SS_no = (tip_seg_social == 0)
 *replace SS_no = . if tip_seg_social == 9
 gen SS_missing = (tip_seg_social == 9)
 
 // pensioon fund 
 destring ind_fondo_pensiones, replace force // Not here but in other sample D08 as opt
 
 gen pension_fund = (ind_fondo_pensiones == 1)
 gen pensioner = (ind_fondo_pensiones == 3) 
 replace pension_fund = . if (ind_fondo_pensiones == 9) 
 replace pensioner = . if (ind_fondo_pensiones == 9) 
 gen formal_work = pension_fund * act_working
 
 // income 
 rename vlr_ingr_salario			   inc_wage			 	
 rename vlr_ingr_honorarios            inc_fees 		 	
 rename vlr_ingr_cosecha               inc_harvest 		 	
 rename vlr_ingr_pension               inc_pension 		 	
 rename vlr_ingr_remesa_pais           inc_remittance_local 
 rename vlr_ingr_remesa_exterior       inc_remittance_inter 
 rename vlr_ingr_arriendos             inc_rent 			
 rename vlr_ingr_fam_accion			  inc_familias_accion
 rename vlr_ingr_col_mayor			  inc_col_mayor
 rename vlr_ingr_otro_subsidio		  inc_other_subsidies	
 rename vlr_otros_ingresos 			  inc_other
 
// add feb 2023 (error correction on May 2023)
 * some missings are "missing" on the income variables (let's make values inherit indicators missings)
 
 foreach v in ind_ingr_salario ind_ingr_honorarios ind_ingr_cosecha ind_ingr_pension ind_ingr_remesa_pais ind_ingr_remesa_exterior ind_ingr_arriendos ind_ingr_estado ind_otros_ingresos {
 	destring `v', replace 
 	gen `v'_dnk = `v' == 9
	replace `v' = . if `v' == 9 // 9 is yes, but dnk (I'll input as missing)
	replace `v' = . if `v' == 99 // odd inconsitency 
	replace `v' = 0 if `v' == 2 // 2 is no
 } 
 
 
  replace inc_wage				= . if	missing(ind_ingr_salario)			
  replace inc_fees 		 	    = . if    missing(ind_ingr_honorarios)
  replace inc_harvest 		 	= . if    missing(ind_ingr_cosecha)
  replace inc_pension 		 	= . if    missing(ind_ingr_pension)
  *replace inc_pension 			= . if 	  pensioner != 1
  replace inc_remittance_local  = . if    missing(ind_ingr_remesa_pais)
  replace inc_remittance_inter  = . if    missing(ind_ingr_remesa_exterior)
  replace inc_rent 			    = . if    missing(ind_ingr_arriendos)  
  replace inc_familias_accion   = . if    missing(ind_ingr_estado)
  *replace inc_col_mayor         = . if    missing(ind_ingr_estado)		
  *replace inc_other_subsidies	= . if    missing(ind_ingr_estado)	
  replace inc_other 			= . if   missing(ind_otros_ingresos)
  
  
 // drop observations with odd mistakes (most likely input errors)
 foreach v in inc_wage inc_fees inc_harvest inc_pension inc_remittance_local inc_remittance_inter inc_rent inc_other inc_familias_accion inc_col_mayor inc_other_subsidies {
	replace `v' = . if `v' < 1000 & `v' != 0
 }
 
 
 // safe guard to avoid a bunch of misleading zeros
 egen work_income = rowtotal(inc_wage inc_harvest inc_fees), missing
 gen pensioner_income = inc_pension if pensioner == 1 
 gen property_income = inc_rent if ind_ingr_arriendos == 1  // only for those reporting rent as a main income
 
 // aggregate 
  // missing income if no work income or pension and does not report property as main source or 
 egen inc1_nt = rowtotal(work_income pensioner_income inc_rent inc_remittance_local inc_remittance_inter inc_rent), missing
 replace inc1_nt = . if inc1_nt == 0 & (work_income == . & pensioner_income == .) & (ind_ingr_arriendos != 1 & inc_remittance_inter != 1 & ind_ingr_remesa_exterior != 1 & ind_otros_ingresos !=1)
 lab var inc1_nt "Individual income (no transfers)"
 
 egen inc1 = rowtotal(work_income pensioner_income inc_rent inc_remittance_local inc_remittance_inter inc_rent inc_other inc_familias_accion inc_col_mayor inc_other_subsidies), missing
 replace inc1 = . if inc1 == 0 & (work_income == . & pensioner_income == .) & (ind_ingr_arriendos != 1 & inc_remittance_inter != 1 & ind_ingr_remesa_exterior != 1 & ind_otros_ingresos !=1 & ind_ingr_estado !=1)
 lab var inc1 "Individual income"
 
 bys id_hogar: egen inc_t = total(inc1), missing
 bys id_hogar: egen inc_nt_t = total(inc1_nt), missing
 lab var inc_t "Household income"
 lab var inc_nt_t "Household income (no transfers)"
  
 *----------2.2: Generating SISBEN Control
 
 * group cutoff:
 destring cod_dpto_sf, replace
 egen cutoff_fe=group(cod_dpto_sf urban)
 lab var cutoff_fe "FE cutoff"
 
 * deciles of per-capita spending (SISBEN)
 g pc_exp=total_expenditure/num_personas_hogar
 xtile pc_exp_q=pc_exp, nq(10)
 tab pc_exp_q, gen(exp_q)
 lab var pc_exp "Per capita expenditure"

 // anderson index assets
 loc z2 ""
 foreach v in own_fridge own_washing_machine own_moto own_tractor own_car own_real_state /*own_house*/ {
 	cap drop z2`v'
 	sum `v' // restrict to one obs per household 
 	loc cmean = `r(mean)'
 	loc sd 	  = `r(sd)'
 	gen z2`v' = (`v'-`cmean')/(`sd') 
 	loc z2		 "`z2' z2`v'"
 	lab var z2`v' "std `v'" 
 }
 
 icw_index `z2' , gen(iassets)
 drop `z2'
 replace iassets = iassets * 100
 lab var iassets "Index of assets"
 bys id_hogar (iassets): replace iassets = iassets[_n-1] if id_hogar[_n] == id_hogar[_n-1] //expand to all
 
 
 * Add a line of code to create ONE index variable based on assets.
 // house quality index 
 glo quality wc_flush floor_finished wall_finished kitchen cookpower_electric cookpower_natgas electricity own_fridge own_washing_machine own_PC own_internet own_moto own_car own_real_state /*own_house*/ sewerage natural_gas trash_collection water_7days water_24h

 * index (inverse covariance index)
 loc z2 ""
 foreach v in $quality {
 	cap drop z2`v'
 	sum `v' 
 	loc cmean = `r(mean)'
 	loc sd 	  = `r(sd)'
 	gen z2`v' = (`v'-`cmean')/(`sd') 
 	loc z2		 "`z2' z2`v'"
 	lab var z2`v' "std `v'" 
 }
 
 icw_index `z2', gen(iquality)
 drop `z2'
 replace iquality = iquality * 100
 lab var iquality "Index of dwelling quality"
 
  
 *----------2.3: proper labelling new variables
 
 lab var wc_flush					"WC - running water"
 lab var wc_pit						"WC - Septic tank"
 lab var wc_unconnected				"WC - unnconnected"
 lab var wc_letrine					"WC - unnconnected"
 lab var wc_no						"WC - does not have"
 lab var floor_finished				"Finished floors"
 lab var floor_brick				"Unfinished floors - brick"
 lab var floor_concrete				"Unfinished floors - concrete"
 lab var floor_badwood				"Unfinished floors - wood in poor state"
 lab var floor_dirt					"Unfinished floors - dirt"
 lab var floor_other				"Unfinished floors - other"
 lab var wall_finished				"Finished walls"
 lab var wall_adobe					"Unfinished walls - Adobe"
 lab var wall_bahareque				"Unfinished walls - Bahareque"
 lab var wall_prefabricated			"Unfinished walls - prefabricates"
 lab var wall_badwood				"Unfinished walls - wood in poor state"
 lab var wall_vegetal				"Unfinished walls - vegetal"
 lab var wall_zinc					"Unfinished walls - zinc"
 lab var wall_no					"Unfinished walls - no walls"
 lab var members_room				"HH members per room"
 lab var members_bedroom			"HH members per bedroom"
 lab var members_bedroom_exc		"HH members per exclusive bedroom"
 lab var kitchen					"Kitchen"
 lab var cookpower_electric			"Cooking power - electricity"
 lab var cookpower_natgas			"Cooking power - natural gas"
 lab var cookpower_popgas			"Cooking power - propane gas"
 lab var cookpower_petrol			"Cooking power - petrol"
 lab var cookpower_carbon			"Cooking power - carbon"
 lab var cookpower_firewood			"Cooking power - firewood"
 lab var cookpower_no				"Cooking power - no power"
 lab var electricity				"Electricty"
 lab var own_fridge					"Own fridge"
 lab var own_washing_machine		"Own  washing machine"
 lab var own_PC						"Own  PC"
 lab var own_internet				"Internet connection"
 lab var own_moto					"Own  motorbike"
 lab var own_tractor				"Own tractor"
 lab var own_car					"Own car"
 lab var own_real_state				"Own real state"
 *lab var own_house					"Own house"
 lab var married					"Head Married"
 lab var free_union					"Free union"
 lab var in_couple					"Living togheter"
 lab var divorced					"Divorced"
 lab var single						"Single"
 lab var parents					"Parents live at home"
 lab var sewerage					"Sewerage system"
 lab var natural_gas				"Natural gas"
 lab var trash_collection			"Trash colletion service"
 lab var running_water				"Running water"
 /*lab var house_rent					"House - lease"
 lab var house_morgage				"House - has morgage"
 lab var house_own					"House - own"
 lab var house_permission			"House - not own but rent free"
 lab var house_defacto				"House - de facto ownership" */
 lab var water_7days				"Running water - 7 days a week"
 lab var water_24h					"Running water - 24h a day"
 lab var water_drink_tap			"Drinking water - tap water/ as provided"
 lab var water_drink_boil			"Drinking water - boiled water"
 lab var water_drink_chlorine		"Drinking water - added chlorine"
 lab var water_drink_filter			"Drinking water - filtred"
 lab var water_drink_decanted		"Drinking water - decantes" 
 lab var water_drink_bottle			"Drinking water - bottled water"	
 lab var trash_service				"Trash disposal - sanitary service"
 lab var trash_buried				"Trash disposal - burried"
 lab var trash_burn					"Trash disposal - burned"
 lab var trash_throw				"Trash disposal - throwed away land"
 lab var trash_water				"Trash disposal - throwed away water"
 lab var trash_informal				"Trash disposal - informal service"
 lab var trash_other				"Trash disposal - other"
 lab var shared_kitchen				"Shared kitchen"
 lab var d_exp_food					"Expends on food"
 lab var d_exp_transport			"Expends on transport"
 lab var d_exp_education			"Expends on education"
 lab var d_exp_health				"Expends on health"
 lab var d_exp_public_services		"Expends in public services"
 lab var d_exp_cellphone			"Expends on cellphone"
 lab var d_exp_rent					"Expends on rent"
 lab var d_exp_other				"Has other expenses"
 lab var grade_attained				"Grade attained (1-13)"
 lab var pre_school					"Education level - preschool"
 lab var primary					"Education level - primary"
 lab var middleschool				"Education level - middleschool"
 lab var highschool					"Education level - highschool"
 lab var tecnico					"Education level - tecnico"
 lab var university					"Education level - university"
 lab var postgrade					"Education level - postgrade"
 lab var no_education				"Education level - no education"
 lab var formal_work 				"Formal work"
 lab var no_edu 					"No education"
 lab var elementary 				"Elementary education"
 lab var tertiary_edu 				"Tertiary education"
 lab var children					"Children"
 lab var act_working				"Main activity - work"
 lab var act_jobseeking				"Main activity - job seeking"
 lab var act_study					"Main activity - study"
 lab var act_stayhome				"Main activity - stay home"
 lab var act_rentier				"Main activity - rentier"
 lab var act_pensioner				"Main activity - pensioner"
 lab var act_disabled				"Main activity - disabled"
 lab var act_noactivity				"Main activity - no activity"
 lab var weeks_jobseeking			"weeks seeking job"
 lab var ocp_employee_priv			"Ocupation - private sector employee"
 lab var ocp_employee_gov			"Ocupation - government employee"
 lab var ocp_employee_dom			"Ocupation - domestic employee"
 lab var ocp_independent_prof		"Ocupation - independent profesional"
 lab var ocp_independent_worker		"Ocupation - independent worker"
 lab var ocp_bussinesowner			"Ocupation - bussines owner"
 lab var ocp_parcel					"Ocupation - land parcel explotation"
 lab var ocp_unpaid					"Ocupation - unpaid work"
 lab var ocp_jornalero				"Ocupation - day work/ jornalero"
 lab var SS_contributive			"SS - contributive"
 lab var SS_special					"SS - special"
 lab var SS_subsidised				"SS - subsidised"
 lab var SS_no						"SS - not registered"
 lab var pension_fund				"Has a pension fund"
 lab var pensioner					"Pensioner"
  

 *--- labelling
 lab var age_feb20				"Age by Feb 2020"
 lab var age_jun20				"Age by Jun 2020"
 lab var age_sep20				"Age by Sep 2020"
 lab var male					"Dummy is male"
 
 *---- minors
 rename num_personas_hogar n_members
 
 //inc percapita
 //inc percapita
 winsor2 inc_t, c(15 98) s(_w)  	// Get rid of odd values
 winsor2 inc_nt_t, c(15 98) s(_w)  	// Get rid of odd values (no transfers)
 gen pp_inc = inc_t_w/n_members
 gen pp_inc_nt = inc_nt_t_w/n_members
 *replace pp_inc = pp_inc / n_members
 *replace pp_inc_nt = pp_inc_nt / n_members
 lab var pp_inc "Per-capita income"
 lab var pp_inc_nt "Per-capita income (no transfers)" 
 
 rename act_working works_0 
 gen informal_work = works_0 == 1 & formal_work == 0 if !missing(works_0) & !missing(formal_work)
 

 *----- new transformations added on May 2023
 *----------  1.5. PMT variables transformation
 gen cookpower_connected = (cookpower_electric == 1 | cookpower_natgas == 1)
 gen living_couple = (free_union == 1 | married == 1 | in_couple==1) 
 gen prop_kids = n_kids_18 / n_members
 
 *---------- drop PIDs 
 drop num_tel_contacto_sf pri_apellido_sf seg_apellido_sf pri_nombre_sf seg_nombre_sf
 
 rename survey sampling_frame 
 
 /*==================================================
               3: Tag survey participants 
 ==================================================*/
 
 *---------- 3.1 Get the IDs
  
 merge 1:1 id_hogar using `surv_id', gen(m_surv)
 order survey_sample pondera, after(id_hogar)
 replace survey_sample = 0 if missing(survey_sample)
 
 lab var pondera "survey weights"
 
 /*==================================================
               4: Additions 
 ==================================================*/
 
 *--------- 4.1 Povertylines 
 
 //>>>>>>>> 2.1.2.1 add poverty-lines
 preserve 
 
 
 import excel "${dir2}/_aux/DANE_povlines_depto_city.xlsx", clear sheet("all") first
 
 drop if missing(depto_name) // empty rows 
 
 forv i = 2019/2021 {
	 foreach j in "" "e" {
		 
		 gen _aux = `j'povline_depto_`i' if depto_name == "Total Nacional"
		 egen nat_`j'pvl_`i' = max(_aux)
		 drop _aux 
		 
		 gen _aux = `j'povline_depto_`i' if depto_name == "Rural"
		 egen nat_rural_`j'pvl_`i' = max(_aux)
		 drop _aux 
		 
		 if ("`j'" == "e")	loc tp "Extreme poverty line"
		 else				loc tp "Poverty line"
		 
		 lab var `j'povline_depto_`i' 	"`tp' dept. - `i'"
		 lab var `j'povline_city_`i' 	"`tp' dept. capital - `i'"
		 lab var nat_`j'pvl_`i' 		"`tp' (National) - `i'"
		 lab var nat_rural_`j'pvl_`i' 	"`tp' (National - rural areas) - `i'"
		 
	 }	
 }
 
 drop if inlist(depto_name, "Total Nacional", "Rural")
 
 drop if depto_name == ""
 drop depto_name city_name
  
 destring depto_code, gen(cod_dpto_sf) 
 drop depto_code
  
 tempfile povlines
 save `povlines'
 
 restore 
 
 merge m:1 cod_dpto_sf using `povlines'
 
 * replace for national if missing 
 foreach v in nat_pvl_2019 nat_rural_pvl_2019 nat_epvl_2019 nat_rural_epvl_2019 nat_pvl_2020 nat_rural_pvl_2020 nat_epvl_2020 nat_rural_epvl_2020 nat_pvl_2021 nat_rural_pvl_2021 nat_epvl_2021 nat_rural_epvl_2021 {
		sum `v'
		replace `v' = r(mean) if _merge == 1
 }
  
  forv i = 2019/2021 {
	 foreach j in "" "e" {
		replace `j'povline_depto_`i' =  nat_`j'pvl_`i' if _merge == 1		
	 }
  }
 
 drop if _merge == 2
 drop _merge

 *--------- 4.1 PPP final consumption (source WB) 
 *--- Added on: AUG30 , 2022
 
 gen CountryCode = "COL"
 
 preserve 
 
 import excel "${dir2}/_aux/PPP_consmption_WB.xls", clear sheet("Data")  ///
 cellrange(A4) first
 
 foreach v of varlist _all {
 	loc vlab: var label `v'
	lab var `v' "PPP consumption - `vlab'"
	
	loc t= 0
	cap loc t = inrange(`vlab',1950,3000) 
	if (`t' == 1)	rename `v' ppp_`vlab'
	
 }
 
 keep CountryCode ppp_2019 ppp_2020 ppp_2021
 keep if CountryCode == "COL"
 
 tempfile ppp
 save `ppp'
 
 restore 
 
 
 merge m:1 CountryCode using `ppp', nogen
 
 drop CountryCode
 
 /*==================================================
               4: Store data 
 ==================================================*/
 
 *---------- 4.1 final save
 save "${dir3}/02_admin/SISBEN_framing_sample.dta", replace
  
 
exit
/* End of do-file */

><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><

Notes:
1.
2.
3.


Version Control:


